********************************************************************************
*					CLEAN VOTECAL DATA & MERGE TO CENSUS DATA 
********************************************************************************

// Clean votecal data on registration and voting for general elections 2002-2010
clear all
set more off
forval y = 2002(2)2010 {
		import dbase ../raw/votecal/`y'/19_vote_stats_by_block_g.dbf,clear
		save ../raw/votecal/vote_g`y'.dta,replace
		import dbase ../raw/votecal/`y'/19_reg_stats_by_block_g.dbf,clear
		save ../raw/votecal/reg_g`y'.dta,replace
}
cap prog drop clean_subgroups
prog clean_subgroups
	syntax, y(str) t(str)
	use ../raw/votecal/`t'_g`y'.dta,replace
	gen elec_yr = `y'
	gen `t'_ttl = totreg_r
	egen `t'_hisp = rowtotal(hispdem-hispoth)
	egen `t'_asian = rowtotal(kordem-filoth)
	gen `t'_dem = dem
	gen `t'_rep = rep
	
	egen `t'_1824 = rowtotal(*1824)
	egen `t'_2534 = rowtotal(*2534)
	egen `t'_3544 = rowtotal(*3544)
	egen `t'_4554 = rowtotal(*4554)
	egen `t'_5564 = rowtotal(*5564)
	egen `t'_65pl = rowtotal(*65pl)
	
	forvalues x = 1/9 {
		egen `t'_`x'g = rowtotal(*`x'g)
	}
	keep geoid elec_yr `t'_*
	drop if geoid ==""
	save ../temp/vc_`t'_`y'.dta, replace
end 
forvalues y = 2002(2)2010 {
	foreach t in `t' reg vote {
		clean_subgroups, y(`y') t(`t')
	}
	use ../temp/vc_reg_`y'.dta,clear
	merge 1:1 geoid using ../temp/vc_vote_`y'.dta, assert(1 2 3) keep(1 3) nogen
	replace vote_ttl = 0 if vote_ttl==.
	assert reg_ttl>=vote_ttl
	gen cb = geoid10
	drop if cb==""
	gen cbg = substr(cb, 1, 12)
	gen ct = substr(cb, 1, 11)
	gen elec_type = "g"
	save  ../temp/vc_`y'.dta,replace
	rm ../temp/vc_reg_`y'.dta
	rm ../temp/vc_vote_`y'.dta
	// note: contains missings --> fill below when merge
}

*** Create census of 0's to merge to file so that have no missing CBs or years
use ../data_intermediate/main_census.dta,clear
expand 5
bys cb: gen elec_yr = 2000 +_n*2
gen geoid10 = cb
save ../temp/census_panel.dta,replace

*** Append vote and reg and then merge with census to create main dataset
use ../temp/vc_2002.dta,clear
forval y =2004(2)2010 {
	append using ../temp/vc_`y'.dta
}
merge 1:1 geoid10 elec_yr using ../temp/census_panel.dta, assert(2 3) 
tab elec_yr _merge,m
drop _merge
// fill in zeroes
foreach v in ttl hisp asian dem rep 1824 2534 3544 4554 5564 65pl ///
	1g 2g 3g 4g 5g 6g 7g 8g 9g {
	replace reg_`v' = 0 if reg_`v'==.
	replace vote_`v' = 0 if vote_`v'==.
}
gen reg = reg_ttl
gen vote = vote_ttl
assert vote_ttl <=reg_ttl

*** Make analysis variables
	// general variables in main dataset
	cap drop elec_id
	gen elec_id = (elec_yr - 1998)/2
	gen reg02 = 0
	replace reg02 = reg if elec_yr==2002
	bys cb (elec_yr): egen reg_02 = max(reg02)
	drop reg02

	//HETEROGENEITY BY VOTER RACE
	//IMPUTE WHITE/BLACK REGISTRATION COUNTS BY POPULATION SHARE
	merge m:1 elec_yr using ../data_intermediate/reg_vote_pct_2002_2010.dta, ///
		assert(1 3) keep(1 3) nogen
	foreach y in reg vote {
		gen `y'_denom = (pct_black*`y'_pct_bla) + (pct_white*`y'_pct_whi) + (pct_other*`y'_pct_oth)
		gen `y'_white_num =pct_white*`y'_pct_whi
		gen `y'_black_num =pct_black*`y'_pct_bla
		gen `y'_other_num =pct_other*`y'_pct_oth
	}
	gen pct_api_hisp = pct_api + pct_hisp
	foreach y in reg vote {
		gen `y'_nothispasian = `y'-`y'_hisp-`y'_asian
		gen `y'_black = `y'_nothispasian * `y'_black_num/`y'_denom 
		gen `y'_white = `y'_nothispasian * `y'_white_num/`y'_denom 
		gen `y'_other = `y'_nothispasian * `y'_other_num/`y'_denom 
		replace `y'_black = 0 if `y'_black==.
		replace `y'_white = 0 if `y'_white==.
		replace `y'_other = 0 if `y'_other==.
		foreach v in black white other {
			replace `y'_`v' = round(`y'_`v',0.001)
		}
		gen `y'_whiteasianoth = `y'_white + `y'_asian + `y'_other
		gen `y'_whiteasian = `y'_white + `y'_asian
		gen `y'_blackhispanic = `y'_black + `y'_hisp
		// below updates `y' for hisp and asian using census data
		// 	replace `y'_hisp = `y' * hisp_num/denom_hisp_asian if denom==0 
		// 	replace `y'_asian = `y' * asian_num/denom_hisp_asian if denom==0 
		di "race vars check"
		assert `y'>=round((`y'_asian + `y'_black + `y'_hisp + `y'_white + `y'_other),1) ///
			if `y'!=0 & `y'_denom!=.
	}

	//HETEROGENEITY BY YEARS REGISTERED
	foreach y in reg vote {
		egen `y'_0to3 = rowtotal(`y'_1g `y'_2g)
		egen `y'_4to7 = rowtotal(`y'_3g `y'_4g)
		egen `y'_8to11 = rowtotal(`y'_5g `y'_6g)
		egen `y'_12to15 = rowtotal(`y'_7g `y'_8g)
		egen `y'_12pl = rowtotal(`y'_7g `y'_8g `y'_9g)
		gen `y'_16pl = `y'_9g
	}
	
	//HETEROGENEITY BY AGE
	egen vote_1834 = rowtotal(vote_1824 vote_2534)
	egen vote_3554 = rowtotal(vote_3544 vote_4554)
	egen vote_55pl = rowtotal(vote_5564 vote_65pl)
	egen reg_1834 = rowtotal(reg_1824 reg_2534)
	egen reg_3554 = rowtotal(reg_3544 reg_4554)
	egen reg_55pl = rowtotal(reg_5564 reg_65pl) 
	
	//HETEROGENEITY BY POLITICAL AFFILIATION
	gen reg_ind = reg - reg_dem - reg_rep
	gen vote_ind = vote - vote_dem - vote_rep

*** Keep only the variables to be used in the analysis
#delimit ;
global vars "CB CBG CT age_10_17_qu all all_c00 black_c00 hisp_c00 all_c10 
black_c10 hisp_c10 all_c02 black_c02 hisp_c02 blhi_qu_c00 blhi_qu_c10 blhi_qu_c02 
cb elec_id elec_yr pct_api pct_black pct_hisp pct_white pct_other 
pop_change pop_dec pop_dec_unrestricted 
reg reg_02 reg_0to3 reg_4to7 reg_8to11 reg_12to15 reg_1834 reg_3554 reg_55pl 
reg_dem reg_ind reg_rep reg_hisp reg_asian sample_main 
vote vote_0to3 vote_4to7 vote_8to11 vote_12to15 vote_1834 vote_3554  vote_55pl 
vote_dem vote_ind vote_rep vote_hisp vote_asian 
reg_black reg_blackhispanic reg_white reg_whiteasian reg_whiteasianoth 
vote_black vote_blackhispanic vote_white vote_whiteasian vote_whiteasianoth";
#delimit cr
keep $vars
save ../data_intermediate/votecal_census_2002_2010.dta,replace


*** CREATE REG PANEL DATA 2002-2016
// Create census of 0's to merge to file so that have no missing CBs or years
use ../data_intermediate/main_census.dta,clear
expand 8
bys cb: gen elec_yr = 2000 +_n*2
gen geoid10 = cb
save ../temp/census_panel_02_16.dta,replace

// Load, clean, and append Votecal reg data for 2002-2016
forval y = 2012(2)2016 {
	local yy = `y' - 2000
	import delim ../raw/votecal/`y'/state_g`yy'_sr_blk_map.csv, clear ///
			stringcols(1/9) numericcols(10/14)
	keep county fips tract block block_key election blkreg blktotreg pctblk
	// block_key has error, but this can be fixed by using fips tract block
	keep if fips == "06037" 
	drop if tract=="0" & block=="0"
	count if length(block)!=4
	replace block = block + "00" if length(block)==2 // one error for 1 block in 2012
	assert length(tract)==6
	assert length(block)==4
	gen cb = fips  + tract + block
	drop if cb == "060370000000" // voters not linked to home address
	assert length(cb)==15
	// check blktotreg sums blkreg within CBs
	bys cb: egen t = sum(blkreg) 
	assert t == blktotreg
	drop t
	bys cb: keep if _n==1
	drop blkreg pctblk
	ren blktotreg reg
	gen reg_ttl = reg
	gen elec_yr = `y'
	gen geoid10 = cb
	save ../raw/votecal/reg_g`y'.dta,replace
}
use ../raw/votecal/reg_g2002.dta,clear
gen elec_yr = 2002
forval y=2004(2)2010 {
	append using ../raw/votecal/reg_g`y'.dta
	replace elec_yr = `y' if elec_yr==.
}
ren totreg_r reg
gen cb = geoid10
drop if cb==""
forval y=2012(2)2016 {
	append using ../raw/votecal/reg_g`y'.dta
}
tab elec_yr,m 
merge 1:1 geoid10 elec_yr using ../temp/census_panel_02_16.dta, assert(1 2 3)
keep if _merge==2 | _merge==3
tab elec_yr _merge,m
drop _merge
replace reg = 0 if reg==. // fill in zeroes
cap drop elec_id
gen elec_id = (elec_yr - 1998)/2
keep cb CB CBG CT elec_yr elec_id all all_c00 black_c00 hisp_c00 all_c10 ///
black_c10 hisp_c10 all_c02 black_c02 hisp_c02 blhi_qu_c00 blhi_qu_c10 blhi_qu_c02 reg
save ../data_intermediate/reg_2002_2016.dta,replace



